# 基本模块
import pandas as pd
import requests_html
from requests_html import HTMLSession
from urllib.parse import urlparse, parse_qs
import urllib.parse


# 一、解析url
url = "https://www.51job.com/"
session = HTMLSession()
r = session.get(url)

行业分类_xpath = '//div[@class="cn hlist"]//div[@class="e"]//p[@class="tie"]//a/@href'

对比链接列表 = r.html.xpath(行业分类_xpath)
对比链接列表.insert(0,'https://search.51job.com/list/030200,000000,7300,32,9,99,%2B,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=')
对比链接列表.insert(0,'https://search.51job.com/list/030200,000000,0000,32,9,99,%25E4%25BA%25A7%25E5%2593%2581%25E7%25BB%258F%25E7%2590%2586,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=')

对比链接_str = ["关键词","职能","互联网/电子商务","金融/投资/证券","汽车","房地产"]
{k:i for k,i in enumerate(对比链接_str)}

对比链接_dict = {i:对比链接列表[k].split(',') for k,i in enumerate(对比链接_str)}
对比链接_df = pd.DataFrame(对比链接_dict)

# 对比链接_df

# 职能
# 对比链接_df.loc[[2]]

# 行业
# 对比链接_df.loc[[3]]

# 关键词
# 对比链接_df.loc[[6]]


# 二、行业分类
行业分类_str = {"01":"计算机软件",37:"计算机硬件",38:"计算机服务(系统、数据服务、维修)",31:"通信/电信/网络设备",39:"通信/电信运营、增值服务",32:"互联网/电子商务",40:"网络游戏","02":"电子技术/半导体/集成电路",35:"仪器仪表/工业自动化",41:"会计/审计","03":"金融/投资/证券",42:"银行",43:"保险",62:"信托/担保/拍卖/典当","04":"贸易/进出口",22:"批发/零售","05":"快速消费品(食品、饮料、化妆品)","06":"服装/纺织/皮革",44:"家具/家电/玩具/礼品",60:"奢侈品/收藏品/工艺品/珠宝",45:"办公用品及设备",14:"机械/设备/重工",33:"汽车",65:"汽车零配件","08":"制药/生物工程",46:"医疗/护理/卫生",47:"医疗设备/器械",12:"广告",48:"公关/市场推广/会展",49:"影视/媒体/艺术/文化传播",13:"文字媒体/出版",15:"印刷/包装/造纸",26:"房地产","09":"建筑/建材/工程",50:"家居/室内设计/装潢",51:"物业管理/商业中心",34:"中介服务",63:"租赁服务","07":"专业服务(咨询、人力资源、财会)",59:"外包服务",52:"检测，认证",18:"法律",23:"教育/培训/院校",24:"学术/科研",11:"餐饮业",53:"酒店/旅游",17:"娱乐/休闲/体育",54:"美容/保健",27:"生活服务",21:"交通/运输/物流",55:"航天/航空",19:"石油/化工/矿产/地质",16:"采掘业/冶炼",36:"电气/电力/水利",61:"新能源",56:"原材料和加工",28:"政府/公共事业",57:"非营利组织",20:"环保",29:"农/林/牧/渔",58:"多元化业务集团公司"}
行业_参数构建 = {value:key for key,value in 行业分类_str.items()}


# 三、职能分类
职能分类_str = {"0100":"后端开发","0121":"Java开发工程师","0120":"PHP开发工程师","0122":"C/C++开发工程师","0124":"Python开发工程师","0126":".NET开发工程师","0153":"C#开发工程师","0151":"Ruby开发工程师","0152":"Go开发工程师","0130":"大数据开发工程师","0129":"Hadoop工程师","0131":"爬虫开发工程师","0132":"脚本开发工程师","0133":"多媒体开发工程师","0155":"GIS工程师","0154":"全栈工程师","0117":"ERP技术开发","0128":"区块链开发","0106":"高级软件工程师","0107":"软件工程师","0143":"系统架构设计师","0123":"系统分析员","0149":"技术文员/助理","0150":"技术文档工程师","0142":"其他",7700:"移动开发",7701:"Android开发工程师",7702:"iOS开发工程师",7705:"小程序开发工程师",7703:"移动开发工程师",7704:"其他",7200:"前端开发",7201:"Web前端开发",7202:"HTML5开发工程师",7203:"其他",7300:"人工智能",7301:"机器学习工程师",7302:"深度学习工程师",7303:"图像算法工程师",7304:"图像处理工程师",7305:"图像识别工程师",7306:"语音识别工程师",7307:"机器视觉工程师",7308:"自然语言处理(NLP)",7309:"算法工程师",7310:"推荐算法工程师",7311:"搜索算法工程师",7312:"其他",7800:"游戏",7801:"游戏策划师",7802:"游戏系统策划",7803:"游戏数值策划",7804:"游戏关卡策划",7805:"游戏文案策划/剧情策划",7806:"游戏界面设计师",7820:"游戏角色设计师",7817:"游戏特效设计师",7822:"UE4特效师",7818:"游戏动作设计师",7819:"游戏场景设计师",7807:"游戏原画师",7808:"游戏动画师",7809:"游戏开发工程师",7810:"Cocos2d-x开发工程师",7811:"Unity3d开发工程师",7823:"UE4开发工程师",7812:"游戏客户端开发工程师",7813:"游戏服务端开发工程师",7821:"游戏测试",7814:"游戏运营",7815:"电子竞技运营",7816:"其他",7400:"视觉/交互设计",7420:"平面设计总监",7421:"平面设计经理/主管",7413:"平面设计师",7419:"美工/电商设计师",7412:"UI设计师",7403:"视觉设计师",7401:"网页设计师",7404:"用户体验（UE/UX）设计师",7402:"交互设计师",7422:"动画/3D设计",7407:"特效设计师",7418:"原画师",7417:"绘画",7416:"多媒体设计",7406:"Flash设计师",7408:"音效设计师",7409:"计算机辅助设计工程师",7410:"仿真应用工程师",7405:"网站架构设计师",7411:"其他","0900":"工业/艺术设计","0919":"工业设计/产品设计","0927":"包装设计","0925":"展览/展示/店面设计","0940":"家具设计","0941":"家居设计","0934":"照明设计","0936":"陈列设计","0920":"工艺品/珠宝设计鉴定","0929":"玩具设计","0921":"其他",2700:"测试",2707:"软件测试工程师",2718:"功能测试",2719:"性能测试",2724:"安全测试",2720:"自动化测试",2721:"移动端测试",2722:"测试开发",2726:"测试总监",2705:"测试经理",2723:"测试主管",2706:"系统测试",2704:"标准化工程师",2725:"测试工程师",2711:"其他",7900:"运维/技术支持",7901:"运维工程师",7920:"自动化运维工程师",7902:"系统工程师",7903:"数据库工程师(DBA)",7904:"系统集成工程师",7905:"ERP实施顾问",7906:"网络安全工程师",7915:"运维开发",7907:"网站维护工程师",7908:"技术支持/维护经理",7909:"技术支持/维护工程师",7910:"配置管理工程师",7912:"IT经理/IT主管",7913:"网络工程师(IT工程师)",7914:"网络管理(Helpdesk)",7916:"网络维修",7917:"手机维修",7918:"电脑维修",7919:"其他",7500:"数据",7502:"数据分析经理/主管",7501:"数据分析师",7503:"ETL开发工程师",7504:"BI工程师",7505:"数据仓库工程师",7506:"数据采集工程师",7507:"数据建模工程师",7508:"数据治理工程师",7509:"其他",6600:"产品",6604:"产品总监",6602:"产品经理/主管",6605:"互联网产品经理",6606:"移动产品经理",6607:"用户产品经理",6608:"电商产品经理",6601:"产品专员",6603:"产品助理",6609:"需求工程师",6610:"其他",8e3:"运营",8050:"运营总监",8049:"运营经理",8048:"运营主管",8047:"运营专员",8053:"运营助理",8030:"网站运营总监",8003:"网站运营经理/主管",8016:"网站运营专员",8032:"网络推广总监",8033:"网络推广经理/主管",8034:"网络推广专员",8024:"SEO/SEM",8046:"信息流优化师",8010:"新媒体运营",8059:"直播运营",8055:"微信运营",8054:"微博运营",8041:"用户运营",8058:"社区/社群运营",8042:"活动运营",8043:"内容运营",8051:"品类运营",8044:"数据运营",8045:"线下运营",8057:"产品运营",8007:"网站编辑",8052:"内容审核",8006:"网站策划",8011:"其他",6100:"电子商务",6111:"电商总监",6110:"电商经理/电商主管",6109:"电商专员",6102:"电商运营",6112:"跨境电商运营",6101:"网店店长",6103:"网店店铺管理员",6104:"网店客服",6105:"店铺推广",6107:"网店模特",6108:"其他",2600:"技术管理",2611:"首席技术执行官CTO",2612:"首席信息官CIO",2602:"技术总监/经理",2605:"项目总监",2606:"项目经理",2607:"项目主管",2608:"项目执行/协调人员",2610:"项目助理",2609:"其他",6700:"半导体/芯片",6701:"集成电路IC设计/应用工程师",6727:"芯片架构工程师",6728:"FPGA开发工程师",6729:"MEMS工程师",6730:"射频芯片设计",6731:"模拟芯片工程师",6722:"版图设计工程师",6732:"模拟版图工程师",6733:"数字前端工程师",6702:"IC验证工程师",6734:"FPGA原型验证工程师",6735:"EDA工程师",6736:"可测性设计工程师(DFT)",6737:"数字后端工程师",6738:"芯片测试工程师",6712:"FAE 现场应用工程师",6723:"半导体工艺工程师",6740:"工艺整合工程师(PIE)",6739:"半导体设备工程师",6741:"失效分析工程师(FA)",6760:"封装工程师",6744:"封装研发工程师",6750:"半导体测试工程师",6746:"芯片销售工程师",6761:"半导体器件工程师",6747:"半导体文档工程师",6748:"半导体产品经理/产品工程师",6707:"半导体技术",6749:"其他",2900:"电子/电器/仪器仪表",2903:"电子工程师/技术员",2964:"PCB工程师",2917:"电子技术研发工程师",2909:"电子软件开发(ARM/MCU...)",2962:"电子元器件工程师",2951:"电子工艺工程师",2965:"SMT工程师",2959:"电子设备工程师",2920:"电子/电器维修工程师/技师",2910:"嵌入式软件开发(Linux/单片机/PLC/DSP…)",2919:"嵌入式硬件开发(主板机…)",2955:"硬件工程师",2956:"高级硬件工程师",2957:"硬件测试工程师",2904:"电气工程师/技术员",2966:"PLC工程师",2905:"电路工程师/技术员(模拟/数字)",2906:"电声/音响工程师/技术员",2911:"电池/电源开发",2914:"仪器/仪表/计量分析师",2958:"计量工程师",2918:"激光/光电子技术",2921:"变压器与磁电工程师",2913:"家用电器/数码产品研发",2908:"自动控制工程师/技术员",2963:"机器人调试工程师",2925:"安防系统工程师",2952:"电子销售工程师",2953:"电子文档工程师",2954:"电子产品经理/产品工程师",2916:"其他",2800:"通信技术开发及应用",2801:"通信技术工程师",2803:"无线通信工程师",2802:"有线传输工程师",2815:"射频工程师",2805:"数据通信工程师",2807:"通信网络工程师",2819:"核心网工程师",2818:"基站工程师",2820:"通信设备工程师",2808:"通信电源工程师",2804:"电信交换工程师",2814:"光通信工程师",2816:"通信测试工程师",2817:"通信销售工程师",2812:"通信文档工程师",2813:"通信产品经理/产品工程师",2821:"通信项目管理",2809:"其他","0200":"销售管理","0201":"销售总监","0202":"销售经理","0203":"销售主管","0232":"业务拓展主管/经理","0233":"渠道/分销总监","0207":"渠道/分销经理","0220":"渠道/分销主管","0235":"大客户管理","0208":"客户经理/主管","0230":"区域销售总监","0226":"区域销售经理","0236":"区域销售主管","0237":"城市经理","0234":"团购经理/主管","0231":"其他",3e3:"销售人员",3009:"大客户销售",3001:"销售代表",3014:"区域销售代表",3002:"渠道/分销专员",3003:"客户代表",3004:"销售工程师",3005:"电话销售",3017:"地推专员",3016:"门店销售",3015:"海外销售",3010:"网络销售/在线销售",3013:"直播销售",3008:"团购业务员",3006:"经销商",3011:"会籍顾问",3012:"销售助理",3007:"其他",3100:"销售行政及商务",3101:"销售行政经理/主管",3102:"销售行政专员",3108:"业务分析经理/主管",3109:"业务分析专员/助理",3103:"商务经理",3104:"商务主管/专员",3105:"商务助理",3106:"销售行政助理",3107:"其他",3200:"客服及支持",3201:"客服总监",3202:"客服经理",3203:"客服主管",3204:"客服专员/助理",3210:"客户关系经理/主管",3205:"售前/售后技术支持经理",3206:"售前/售后技术支持主管",3207:"售前/售后技术支持工程师",3208:"咨询热线/呼叫中心服务人员",3213:"网络/在线客服",3211:"投诉专员",3212:"VIP专员",3209:"其他","0400":"财务/审计/税务","0444":"首席财务官 CFO","0401":"财务总监","0402":"财务经理","0458":"财务专员","0445":"财务顾问","0403":"财务主管/总账主管","0422":"财务助理/财务文员","0406":"财务分析经理/主管","0407":"财务分析员","0448":"固定资产会计","0404":"会计经理/会计主管","0405":"会计","0457":"会计助理","0408":"成本经理/成本主管","0409":"成本管理员","0414":"出纳员","0449":"资金经理/主管","0450":"资金专员","0410":"审计经理/主管","0419":"审计专员/助理","0411":"税务经理/税务主管","0412":"税务专员/助理","0446":"统计员","0443":"其他",3300:"金融/证券/期货/投资",3301:"证券/期货/外汇经纪人",3302:"证券分析师",3319:"期货分析师",3316:"量化研究",3320:"证券交易员",3303:"股票/期货操盘手",3304:"金融/经济研究员",3312:"金融产品经理",3315:"金融产品销售",3317:"机构业务销售",3322:"投资总监",3323:"投资经理",3341:"基金经理",3325:"投资顾问",3326:"理财顾问",3307:"投资银行业务",3313:"投资银行财务分析",3308:"融资经理/融资主管",3309:"融资专员",3318:"营业部总经理/副总经理",3314:"风险管理/控制",3324:"资产管理",3310:"拍卖/担保/典当业务",3311:"其他",2200:"银行",2207:"行长/副行长",2231:"银行客户总监",2223:"个人业务部门经理/主管",2224:"个人业务客户经理",2225:"公司业务部门经理/主管",2226:"公司业务客户经理",2227:"综合业务经理/主管",2228:"综合业务专员",2233:"理财经理",2208:"资产评估/分析",2209:"风险控制",2215:"信贷管理",3340:"催收",2229:"信审核查",2210:"进出口/信用证结算",2212:"外汇交易",2211:"清算人员",2213:"高级客户经理/客户经理",2214:"客户主管/专员",2230:"营业部大堂经理",2222:"信用卡销售",2232:"呼叫中心客服",2216:"银行柜员",2234:"小微信贷专员",2221:"其他",3400:"保险",3401:"保险精算师",3402:"保险产品开发/项目策划",3403:"保险业务经理/主管",3404:"保险经纪人/保险代理",3414:"保险电销",3407:"保险核保",3408:"保险理赔",3409:"保险客户服务/续期管理",3410:"保险培训师",3411:"保险内勤",3413:"契约管理",3405:"理财顾问/财务规划师",3406:"储备经理人",3415:"保险业务推动/督导",3412:"其他",3500:"生产/营运",3501:"工厂经理/厂长",3502:"总工程师/副总工程师",3513:"项目总监",3503:"项目经理/主管",3504:"项目工程师",3505:"营运经理",3506:"营运主管",3514:"生产总监",3507:"生产经理/车间主任",3509:"生产主管",3515:"生产领班/组长",3508:"生产计划/物料管理(PMC)",3512:"生产文员",3518:"生产跟单",3516:"设备主管",3510:"化验员",3517:"厂务",3511:"其他",3600:"质量安全",3601:"质量管理/测试经理(QA/QC经理)",3602:"质量管理/测试主管(QA/QC主管)",3603:"质量管理/测试工程师(QA/QC工程师)",3605:"可靠度工程师",3606:"故障分析工程师",3607:"认证工程师",3608:"体系工程师",3604:"质检员/测试员(QC)",3615:"审核员",3609:"环境/健康/安全经理/主管（EHS）",3610:"环境/健康/安全工程师（EHS）",3614:"安全员",3611:"供应商管理",3612:"采购材料、设备质量管理",3613:"其他","0500":"工程/机械/能源","0510":"技术研发经理/主管","0511":"技术研发工程师","0547":"产品工艺/制程工程师","0559":"产品规划工程师","0584":"项目管理","0512":"实验室负责人/工程师","0513":"工程/设备经理","0514":"工程/设备主管","0515":"工程/设备工程师","0523":"工程/机械绘图员","0560":"工业工程师","0582":"材料工程师","0539":"机械工程师","0561":"结构工程师","0548":"模具工程师","0544":"机电工程师","0586":"机械设计","0587":"模具设计","0580":"维修经理/主管","0537":"维修工程师","0581":"装配工程师/技师","0562":"铸造/锻造工程师/技师","0563":"注塑工程师/技师","0564":"焊接工程师/技师","0565":"夹具工程师/技师","0566":"CNC工程师","0567":"冲压工程师/技师","0568":"锅炉工程师/技师","0569":"电力工程师/技术员","0570":"光源与照明工程","0583":"光伏系统工程师","0571":"汽车/摩托车工程师","0572":"船舶工程师","0575":"轨道交通工程师/技术员","0576":"飞机维修机械师","0573":"飞行器设计与制造","0577":"水利/水电工程师","0585":"空调/热能工程师","0578":"石油天然气技术人员","0579":"矿产勘探/地质勘测工程师","0574":"其他",7100:"汽车研发设计",7101:"汽车项目管理",7102:"汽车设计工程师",7103:"车身/造型设计",7104:"汽车结构工程师",7105:"内外饰工程师",7106:"汽车电子工程师",7107:"电气/电器工程师",7108:"附件系统工程师",7109:"动力总成工程师",7110:"发动机工程师",7111:"底盘工程师",7112:"汽车安全性能工程师",7113:"汽车试验工程师",7114:"新能源电池工程师",7115:"新能源电控工程师",7116:"新能源电机工程师",7117:"汽车标定工程师",7118:"发动机匹配工程师",7119:"车联网工程师",7120:"智能驾驶工程师",7121:"研发总监/部长/专家",7122:"其他",5400:"汽车制造",5404:"汽车质量工程师",5421:"供应商质量工程师",5422:"前期质量工程师",5423:"过程质量工程师",5424:"客户质量工程师",5406:"汽车装配工艺工程师",5425:"总装工程师",5426:"焊接工艺工程师",5427:"冲压工艺工程师",5428:"涂装工艺工程师",5411:"其他",5900:"汽车销售与服务",5903:"汽车销售/经纪人",5902:"售后服务/客户服务",5916:"汽车金融销售",5918:"汽车金融经理",5917:"汽车金融专员",5915:"车险定损/理赔",5907:"汽车修理工",5905:"汽车检验/检测",5906:"汽车装饰美容",5913:"汽车钣金",5914:"汽车喷漆",5912:"汽车电工",5908:"洗车工",5901:"4S店经理/维修站经理",5904:"二手车评估师",5910:"加油站工作员",5911:"其他",3700:"技工普工",3710:"普工/操作工",3701:"技工",3707:"叉车司机/铲车司机",3728:"吊车司机",3729:"挖掘机司机",3715:"组装工",3716:"包装工",3703:"焊工",3717:"氩弧焊工",3706:"电工",3718:"电力线路工",3719:"旋压工",3720:"仪表工",3721:"电镀工",3722:"喷塑工",3709:"水工",3723:"木工",3724:"漆工",3708:"空调工",3725:"电梯工",3726:"锅炉工",3730:"3D打印操作员",3727:"学徒工",3713:"其他",3800:"服装/纺织/皮革",3812:"服装/纺织设计总监",3801:"服装/纺织设计",3813:"服装/纺织/皮革工艺师",3802:"面料辅料开发",3803:"面料辅料采购",3804:"服装/纺织/皮革跟单",3814:"服装领班",3805:"服装纺织质检员(QA/QC)",3806:"板房/楦头/底格出格师",3811:"电脑放码员",3808:"纸样师/车板工",3809:"裁床",3807:"打样/制版",3815:"裁剪工",3816:"缝纫工",3817:"手缝工",3818:"烫工",3819:"样衣工",3820:"纺织工",3821:"针织工",3822:"配色工",3823:"印染工",3824:"漂染工",3825:"挡车工",3826:"整经工",3827:"细纱工",3828:"浆纱工",3810:"其他",3900:"采购",3901:"采购总监",3902:"采购经理",3903:"采购主管",3904:"采购员",3905:"采购助理",3908:"买手",3909:"供应商开发",3907:"其他",4e3:"贸易",4001:"贸易/外贸经理/主管",4002:"贸易/外贸专员/助理",4009:"外贸销售",4003:"国内贸易人员",4004:"业务跟单经理",4005:"高级业务跟单",4006:"业务跟单",4007:"助理业务跟单",4008:"其他","0800":"物流/仓储","0827":"物流总监","0801":"物流经理","0802":"物流主管","0814":"物流专员/助理","0837":"物流销售","0828":"供应链总监","0825":"供应链经理","0826":"供应链主管/专员","0803":"物料经理","0804":"物料主管/专员","0808":"仓库经理/主管","0809":"仓库管理员","0840":"仓库文员","0834":"订单处理员","0810":"运输经理/主管","0833":"项目经理/主管","0829":"货运代理","0830":"集装箱业务","0832":"海关事务管理","0811":"报关与报检","0812":"单证员","0815":"船务/空运陆运操作","0813":"快递员","0838":"分拣员","0831":"调度员","0835":"安检员","0823":"仓储理货员","0836":"搬运工","0839":"装卸工","0824":"其他",4100:"生物/制药/医疗器械",4101:"生物工程/生物制药",4116:"化学分析测试员",4103:"医药技术研发管理人员",4104:"医药技术研发人员",4126:"医药学术推广",4105:"临床研究员",4106:"临床协调员",4127:"临床监查员",4123:"临床数据分析员",4107:"药品注册",4108:"药品生产/质量管理",4109:"药品市场推广经理",4110:"药品市场推广主管/专员",4120:"医药招商",4121:"政府事务管理",4122:"招投标管理",4111:"医药销售经理/主管",4112:"医药代表",4102:"医药销售人员",4117:"医疗器械注册",4124:"医疗器械研发",4118:"医疗器械生产/质量管理",4113:"医疗器械市场推广",4125:"医疗器械销售经理/主管",4114:"医疗器械销售代表",4119:"医疗器械维修人员",4115:"其他",5500:"化工",5501:"化工技术应用/化工工程师",5502:"化工实验室研究员/技术员",5503:"涂料研发工程师",5504:"配色技术员",5505:"塑料工程师",5506:"化妆品研发",5507:"食品/饮料研发",5509:"造纸研发",5508:"其他",1300:"医院/医疗/护理",1302:"医院管理人员",1328:"综合门诊/全科医生",1301:"内科医生",1317:"外科医生",1318:"专科医生",1319:"牙科医生",1337:"妇产科医生",1339:"眼科医生",1320:"美容整形师",1329:"医美咨询",1308:"麻醉医生",1327:"超声影像/放射科医师",1321:"理疗师",1322:"中医科医生",1313:"针灸/推拿",1325:"儿科医生",1309:"心理医生",1335:"心理咨询师",1314:"营养师",1330:"健康管理师",1304:"药库主任/药剂师",1310:"医学检验",1331:"核酸检测员",1323:"公共卫生/疾病控制",1333:"消毒员",1332:"防疫员",1324:"护理主任/护士长",1305:"护士/护理人员",1336:"导医",1315:"兽医",1326:"验光师",1311:"其他",4200:"广告",4201:"广告客户总监/副总监",4202:"广告客户经理",4203:"广告客户主管/专员",4205:"广告创意总监",4204:"广告创意/设计经理",4206:"广告创意/设计主管/专员",4212:"广告制作执行",4213:"广告销售",4211:"美术指导",4207:"文案/策划",4208:"企业/业务发展经理",4209:"企业策划人员",4210:"其他",4300:"公关/媒介",4315:"公关总监",4301:"公关经理",4302:"公关主管",4303:"公关专员",4304:"会务/会展经理",4305:"会务/会展主管",4306:"会务/会展专员",4307:"媒介经理",4308:"媒介主管",4309:"媒介专员",4310:"公关/媒介助理",4312:"媒介销售",4313:"活动策划",4314:"活动执行",4311:"其他","0300":"市场/营销","0301":"市场/营销/拓展总监","0302":"市场/营销/拓展经理","0303":"市场/营销/拓展主管","0304":"市场/营销/拓展专员","0305":"市场助理","0340":"互联网营销师","0324":"市场分析/调研人员","0306":"产品/品牌经理","0307":"产品/品牌主管","0330":"产品/品牌专员","0308":"市场通路经理/主管","0335":"市场通路专员","0336":"市场企划经理/主管","0337":"市场企划专员","0310":"促销经理","0338":"选址拓展/新店开发","0329":"其他",4400:"影视/媒体",4401:"影视策划/制作人员",4402:"导演/编导",4417:"编剧",4418:"制片人",4403:"艺术/设计总监",4414:"艺术指导/舞台美术设计",4404:"经纪人/星探",4405:"主播/主持人",4406:"摄影师/摄像师",4411:"后期制作",4416:"视频剪辑",4407:"音效师",4408:"配音员",4415:"灯光师",4412:"放映经理/主管",4413:"放映员",4410:"其他",4500:"编辑出版",4501:"总编/副总编",4502:"编辑",4517:"作家/撰稿人",4503:"记者",4516:"电话采编",4504:"美术编辑",4505:"排版设计",4507:"出版/发行",4508:"其他",6900:"建筑规划与设计",6919:"室内设计总监",6918:"室内设计经理/主管",6901:"室内设计",6917:"室内设计师助理",6902:"软装设计",6903:"精装设计",6920:"家装顾问",6905:"建筑设计师",6906:"钢结构设计",6907:"幕墙设计",6908:"建筑结构设计",6909:"建筑制图/模型/渲染",6911:"暖通设计",6912:"给排水设计",6910:"建筑机电设计",6914:"园艺/园林/景观设计",6913:"城市规划设计",6904:"规划与设计",6916:"BIM工程师",6915:"其他",2100:"建筑工程与装潢",2101:"建筑工程师",2123:"高级建筑工程师/总工",2104:"给排水/暖通工程",2122:"幕墙工程师",2103:"建筑机电工程师",2125:"楼宇自动化",2126:"智能大厦/综合布线/安防/弱电",2146:"精装修工程师",2147:"房修工程师",2102:"结构/土木/土建工程师",2118:"公路/桥梁/港口/隧道工程",2119:"岩土工程",2120:"测绘/测量",2127:"开发报建",2105:"工程造价师/预结算经理",2124:"预结算员",2106:"建筑工程管理/项目经理",2133:"建筑项目助理",2121:"建筑工程验收",2107:"工程监理",2132:"市政工程师",2128:"合同管理",2129:"安全员",2130:"资料员",2111:"建筑安装施工员",2134:"砌筑工",2135:"瓦工",2136:"混凝土工",2137:"浇注工",2138:"钢筋工",2139:"木工",2140:"油漆工",2141:"电梯工",2142:"抹灰工",2143:"施工开料工",2144:"管道/暖通",2145:"工长",2112:"消防安全",2116:"其他",4600:"房地产开发",4601:"房地产项目/策划经理",4602:"房地产项目/策划主管/专员",4604:"房地产投资管理",4603:"房产项目配套工程师",4608:"房地产项目招投标",4610:"房地产投资分析",4611:"房地产资产管理",4612:"监察人员",4607:"其他",6e3:"房地产销售与中介",6009:"房地产销售经理/主管",6010:"房地产销售",6001:"房地产中介/置业顾问",6002:"房地产评估",6004:"房地产店长/经理",6007:"房地产内勤",6006:"房地产客服",6008:"其他",4700:"物业管理",4702:"物业管理经理",4714:"物业管理主管",4703:"物业管理专员/助理",4704:"招商/租赁/租售",4719:"写字楼运营",4717:"长租公寓管家/养老专员",4701:"高级物业顾问/物业顾问",4716:"前介工程师",4705:"物业设施管理人员",4715:"物业机电维修工",4708:"物业机电工程师",4706:"物业维修员",4709:"停车管理员",4710:"保安经理",4711:"保安人员",4712:"保洁",4713:"绿化工",4707:"其他","0600":"人力资源","0601":"人事总监","0611":"HRBP","0602":"人事经理","0603":"人事主管","0604":"人事专员","0605":"人事助理","0606":"招聘经理/主管","0626":"招聘专员/助理","0607":"薪资福利经理/主管","0608":"薪资福利专员/助理","0627":"绩效考核经理/主管","0628":"绩效考核专员/助理","0609":"培训经理/主管","0610":"培训专员/助理/培训师","0629":"企业文化/员工关系/工会管理","0630":"人力资源信息系统专员","0635":"劳务派遣专员","0625":"其他","0700":"高级管理","0701":"首席执行官CEO/总裁/总经理","0707":"首席运营官COO","0702":"副总经理/副总裁","0704":"合伙人","0705":"总监/部门经理","0710":"策略发展总监","0711":"企业秘书/董事会秘书","0712":"投资者关系","0708":"办事处首席代表","0709":"办事处/分公司/分支机构经理","0703":"总裁助理/总经理助理","0706":"其他",2300:"行政/后勤",2301:"行政总监",2302:"行政经理/主管/办公室主任",2303:"行政专员/助理",2304:"经理助理/秘书",2311:"文员",2310:"党工团干事",2305:"前台接待/总机/接待生",2307:"图书管理员/资料管理员",2312:"档案管理员",2308:"电脑操作员/打字员",2306:"后勤",2309:"其他",1400:"咨询/顾问",1401:"专业顾问",1402:"咨询总监",1403:"咨询经理",1406:"专业培训师",1404:"咨询员",1409:"调研员",1408:"猎头/人才中介",1407:"情报信息分析人员",1405:"其他",1100:"律师/法务/合规",1101:"律师/法律顾问",1103:"律师助理",1111:"法务总监",1106:"法务经理",1102:"法务主管/专员",1107:"法务助理",1109:"合规经理",1110:"合规主管/专员",1108:"知识产权/专利/商标",1105:"其他",1200:"教师",1219:"英语老师",1220:"数学老师",1221:"语文老师",1222:"物理老师",1223:"化学老师",1224:"日语老师",1207:"幼教",1225:"早教老师",1228:"美术老师",1233:"钢琴老师",1226:"音乐老师",1216:"体育教师",1215:"其他外语老师",1218:"在线辅导老师",1209:"小学教师",1231:"初中教师",1232:"高中教师",1208:"大学教授",1211:"职业技术教师",1204:"讲师/助教",1205:"家教",1210:"兼职教师",1206:"其他",8100:"教育咨询",8101:"课程顾问",8102:"招生老师",8103:"学习规划师",8104:"留学顾问",8105:"其他",8200:"教育管理",8201:"校长",8202:"班主任/辅导员",8203:"院校教务管理人员",8204:"园长",8205:"教研组长/主管",8206:"教研员",8207:"教师培训/师训",8208:"其他",5700:"培训",5701:"培训督导",5702:"培训讲师",5703:"培训策划",5707:"培训产品开发",5704:"培训助理",5705:"其他",1e3:"科研",1002:"科研管理人员",1001:"科研人员",4800:"餐饮服务",4801:"餐饮店长/经理",4819:"餐饮大堂经理",4802:"餐厅领班",4803:"餐饮服务员",4806:"行政主厨/厨师长",4807:"中餐厨师",4820:"西餐厨师",4821:"日式厨师",4822:"面点师",4823:"西点师",4812:"厨师助理/学徒",4809:"茶艺师",4816:"咖啡师",4808:"调酒师/侍酒师/吧台员",4804:"礼仪/迎宾",4824:"餐饮预订员",4818:"餐饮收银员",4813:"配菜/打荷",4811:"传菜主管",4825:"传菜员",4814:"洗碗工",4815:"送餐员",4817:"杂工",4810:"其他",4900:"酒店旅游",4901:"酒店/宾馆经理",4902:"酒店/宾馆销售",4916:"预订主管",4917:"预订员",4903:"酒店大堂经理",4905:"酒店前台",4912:"宴会管理",4915:"宾客服务经理",4904:"楼面经理",4906:"客房服务员/楼面服务员",4918:"健身房服务",4907:"行李员",4914:"管家部经理/主管",4908:"清洁服务人员",4919:"旅游产品销售",4920:"行程管理/计调",4921:"签证专员",4909:"导游/旅行顾问",4910:"票务",4913:"机场代表",4911:"其他",5e3:"美容保健",5018:"美容店长",5016:"美容培训师/导师",5001:"美容顾问",5019:"美容师",5002:"美容助理",5013:"彩妆培训师",5014:"专柜彩妆顾问(BA)",5020:"化妆师",5021:"造型师",5022:"美发店长",5004:"发型师",5005:"发型助理/学徒",5006:"美甲师",5017:"美体师",5003:"瘦身顾问",5023:"SPA 技师",5007:"按摩",5024:"足疗",5010:"宠物护理/美容",5011:"其他",5100:"百货零售",5101:"门店经理/店长",5122:"店长助理",5112:"品类管理",5114:"品牌/连锁招商管理",5115:"奢侈品业务",5102:"店员/营业员",5116:"珠宝销售顾问",5117:"促销主管/督导/巡店",5123:"导购管理",5105:"促销员/导购员",5103:"收银主管",5119:"收银员",5124:"陈列管理",5104:"陈列员",5120:"收货员",5121:"理货员",5130:"商品管理",5113:"安防主管",5108:"防损员/内保",5109:"西点师/面包糕点加工",5110:"生鲜食品加工/处理",5111:"熟食加工",5106:"兼职店员",5107:"其他",1800:"交通运输服务",1822:"飞机机长/副机长",1823:"空乘人员",1825:"列车/地铁车长",1827:"船长/副船长",1810:"商务司机",1830:"客运司机",1831:"货运司机",1832:"出租车司机",1833:"班车司机",1826:"列车/地铁司机",1835:"特种车司机",1839:"驾校教练",1840:"代驾",1824:"地勤人员",1801:"乘务员",1828:"船员",1829:"其他",5200:"家政保洁",5206:"家政服务/保姆",5209:"月嫂",5210:"育婴师/保育员",5211:"护工",5205:"清洁工",5212:"钟点工",5213:"洗衣工",5214:"送水工",5202:"保镖",5215:"空调维修",5216:"家电维修",5203:"寻呼员/话务员",5207:"其他",1500:"政府/非盈利机构",1501:"公务员",1502:"志愿者/社会工作者",1503:"城市管理网格员",2e3:"翻译",2001:"英语翻译",2002:"日语翻译",2003:"德语翻译",2004:"法语翻译",2005:"俄语翻译",2010:"意大利语翻译",2006:"西班牙语翻译",2011:"葡萄牙语翻译",2009:"阿拉伯语翻译",2007:"韩语/朝鲜语翻译",2012:"泰语翻译",2013:"中国方言翻译",2008:"其他语种翻译",1600:"在校学生",1605:"研究生",1602:"大学/大专应届毕业生",1601:"中专/职校生",1604:"其他",1700:"储备干部/培训生/实习生",1702:"储备干部",1701:"培训生",1703:"实习生",5300:"兼职",5301:"兼职",5600:"环保",5601:"环保工程师",5604:"环境影响评价工程师",5609:"生态治理/规划",5605:"环保检测",5606:"水质检测员",5602:"水处理工程师",5607:"固废工程师",5608:"废气处理工程师",5603:"其他",5800:"农/林/牧/渔",5801:"养殖部主管",5802:"场长(农/林/牧/渔业)",5803:"农艺师",5804:"畜牧师",5805:"饲养员",5808:"农业技术员",5806:"动物营养/饲料研发",5810:"驯兽师/助理驯兽师",5807:"其他",6200:"机械机床",6201:"数控操机",6202:"数控编程",6203:"机修工",6204:"折弯工",6205:"车工",6206:"磨工",6207:"铣工",6208:"冲压工",6209:"刨工",6210:"钳工",6211:"钻工",6212:"镗工",6213:"铆工",6214:"钣金工",6215:"抛光工",6216:"切割技工",6217:"模具工",6218:"炼胶工",6219:"硫化工",6220:"吹膜工",6221:"注塑工",6222:"其他",6300:"印刷包装",6301:"印刷工",6302:"校对/录入",6304:"调色员",6305:"烫金工",6306:"晒版员",6307:"印刷排版/制版",6308:"装订工",6309:"印刷机械机长",6310:"数码直印/菲林输出",6311:"调墨技师",6312:"电分操作员",6313:"打稿机操作员",6314:"切纸机操作工",6315:"裱胶工",6316:"压痕工",6317:"复卷工",6318:"其他",6400:"运动健身",6401:"健身顾问/教练",6402:"瑜伽老师",6403:"舞蹈老师",6404:"游泳教练",6405:"救生员",6406:"高尔夫教练",6407:"体育运动教练",6408:"其他",6500:"休闲娱乐",6512:"网络主播",6513:"主播助理",6514:"带货主播",6501:"司仪",6502:"婚礼/庆典策划服务",6503:"DJ",6504:"驻唱/歌手",6505:"舞蹈演员",6506:"模特",6507:"演员/群众演员",6509:"娱乐领班",6510:"娱乐服务员",6511:"前台迎宾",6508:"其他"}
职能_参数构建 = {value:key for key,value in 职能分类_str.items()}


# 四、URL生成（含职能、行业及关键词）
参数模版 = urllib.parse.urlparse(对比链接列表[2])
参数模版_list = pd.Series(参数模版).tolist()
行业参数 = 参数模版_list[2].split(',')

def url_参数模板生成(function,industries,keyword):
    function_values = 职能_参数构建[function]
    industries_values = 行业_参数构建[industries]
    keyword_values = urllib.parse.quote(keyword)
    行业参数[2] = str(function_values)
    行业参数[3] = str(industries_values)
    行业参数[6] = str(keyword_values)
    完整参数 = ",".join(行业参数)
    参数模版_list[2] = 完整参数
    url_参数 =  urllib.parse.urlunparse(参数模版_list)      
    return (url_参数)

generate_url = url_参数模板生成("产品经理/主管","互联网/电子商务","互联网")


# 五、翻页及获取页面内容
r = session.get(generate_url)

__SEARCH_RESULT__ = r.html.xpath('//script')[-4].html.split('__SEARCH_RESULT__ = ')[1].split('</script>')[0]
results = eval(__SEARCH_RESULT__)

# 获取总页数
total_page = results['total_page']

# 生成各页链接列表
url_split = generate_url.split(',')
page_url_split = generate_url.split(',')[8].split('.')
url_group = []

for i in range(1,int(total_page)+1):
    # 修改链接页数
    page_url_split[0] = str(i)
    url_split[8] = ".".join(page_url_split)
    url_complete = ",".join(url_split)
    url_group.append(url_complete)
    
# 生成并保存表格    
list_df = []

for i in (url_group):
    r = session.get(i)
    __SEARCH_RESULT__ = r.html.xpath('//script')[-4].html.split('__SEARCH_RESULT__ = ')[1].split('</script>')[0]
    results = eval(__SEARCH_RESULT__)
    df = pd.DataFrame(results['engine_search_result'])[['job_name','company_name','job_href','attribute_text','companyind_text','issuedate']]
    list_df.append(df)

df_all = pd.concat(list_df).reset_index()

with pd.ExcelWriter('data_out/51job.xlsx',mode='w',engine="openpyxl") as writer:  
            df_all.to_excel(writer, sheet_name='职位列表页')


# 六、获取详细页内容
href_list = df_all['job_href'].tolist()

# 修改为正确的链接
href_list_accurate = [i.replace('\\','') for i in href_list]

# 生成并保存表格 
detail_df = {}
total_df = []

for i in href_list_accurate[:25]:
    r = session.get(i)
    职位信息_xpath = '//div[@class="tBorderTop_box"]/div[@class="bmsg job_msg inbox"]/p/text()'
    职位名称_xpath = '//div[@class="cn"]/h1/@title'
    职位信息 = r.html.xpath(职位信息_xpath)
    职位信息_合并 = "".join(职位信息)
    职位名称 = r.html.xpath(职位名称_xpath)
    detail_df["职位名称"] = 职位名称
    detail_df["职位信息"] = 职位信息_合并
    detail_df["链接"] = i
    df = pd.DataFrame(detail_df)
    total_df.append(df)

df_all = pd.concat(total_df).reset_index()

with pd.ExcelWriter('data_out/51job.xlsx',mode='a',engine="openpyxl") as writer:  
            df_all.to_excel(writer, sheet_name='职位详细页_前25')